# !pip install git+https://github.com/alberanid/imdbpy
# !pip install pandas
# !pip install numpy
# !pip install matplotlib
# !pip install seaborn
# !pip install pandas_profiling --upgrade
# !pip install plotly
# !pip install wordcloud
# !pip install Flask
# Import Dataset
# Import File from Loacal Drive
# from google.colab import files
# data_to_load = files.upload()
# from google.colab import drive
# drive.mount('/content/drive')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import collections
import plotly.express as px
import plotly.graph_objects as go
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk.util import ngrams
from plotly.subplots import make_subplots
from plotly.offline import iplot, init_notebook_mode
from wordcloud import WordCloud, STOPWORDS
from pandas_profiling import ProfileReport
%matplotlib inline
warnings.filterwarnings("ignore")
nltk.download('all')
[nltk_data] Downloading collection 'all' [nltk_data] | [nltk_data] | Downloading package abc to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package abc is already up-to-date! [nltk_data] | Downloading package alpino to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package alpino is already up-to-date! [nltk_data] | Downloading package biocreative_ppi to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package biocreative_ppi is already up-to-date! [nltk_data] | Downloading package brown to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package brown is already up-to-date! [nltk_data] | Downloading package brown_tei to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package brown_tei is already up-to-date! [nltk_data] | Downloading package cess_cat to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package cess_cat is already up-to-date! [nltk_data] | Downloading package cess_esp to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package cess_esp is already up-to-date! [nltk_data] | Downloading package chat80 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package chat80 is already up-to-date! [nltk_data] | Downloading package city_database to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package city_database is already up-to-date! [nltk_data] | Downloading package cmudict to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package cmudict is already up-to-date! [nltk_data] | Downloading package comparative_sentences to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package comparative_sentences is already up-to- [nltk_data] | date! [nltk_data] | Downloading package comtrans to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package comtrans is already up-to-date! [nltk_data] | Downloading package conll2000 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package conll2000 is already up-to-date! [nltk_data] | Downloading package conll2002 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package conll2002 is already up-to-date! [nltk_data] | Downloading package conll2007 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package conll2007 is already up-to-date! [nltk_data] | Downloading package crubadan to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package crubadan is already up-to-date! [nltk_data] | Downloading package dependency_treebank to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package dependency_treebank is already up-to-date! [nltk_data] | Downloading package dolch to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package dolch is already up-to-date! [nltk_data] | Downloading package europarl_raw to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package europarl_raw is already up-to-date! [nltk_data] | Downloading package floresta to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package floresta is already up-to-date! [nltk_data] | Downloading package framenet_v15 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package framenet_v15 is already up-to-date! [nltk_data] | Downloading package framenet_v17 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package framenet_v17 is already up-to-date! [nltk_data] | Downloading package gazetteers to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package gazetteers is already up-to-date! [nltk_data] | Downloading package genesis to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package genesis is already up-to-date! [nltk_data] | Downloading package gutenberg to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package gutenberg is already up-to-date! [nltk_data] | Downloading package ieer to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package ieer is already up-to-date! [nltk_data] | Downloading package inaugural to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package inaugural is already up-to-date! [nltk_data] | Downloading package indian to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package indian is already up-to-date! [nltk_data] | Downloading package jeita to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package jeita is already up-to-date! [nltk_data] | Downloading package kimmo to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package kimmo is already up-to-date! [nltk_data] | Downloading package knbc to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package knbc is already up-to-date! [nltk_data] | Downloading package lin_thesaurus to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package lin_thesaurus is already up-to-date! [nltk_data] | Downloading package mac_morpho to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package mac_morpho is already up-to-date! [nltk_data] | Downloading package machado to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package machado is already up-to-date! [nltk_data] | Downloading package masc_tagged to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package masc_tagged is already up-to-date! [nltk_data] | Downloading package moses_sample to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package moses_sample is already up-to-date! [nltk_data] | Downloading package movie_reviews to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package movie_reviews is already up-to-date! [nltk_data] | Downloading package names to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package names is already up-to-date! [nltk_data] | Downloading package nombank.1.0 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package nombank.1.0 is already up-to-date! [nltk_data] | Downloading package nps_chat to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package nps_chat is already up-to-date! [nltk_data] | Downloading package omw to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package omw is already up-to-date! [nltk_data] | Downloading package opinion_lexicon to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package opinion_lexicon is already up-to-date! [nltk_data] | Downloading package paradigms to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package paradigms is already up-to-date! [nltk_data] | Downloading package pil to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package pil is already up-to-date! [nltk_data] | Downloading package pl196x to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package pl196x is already up-to-date! [nltk_data] | Downloading package ppattach to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package ppattach is already up-to-date! [nltk_data] | Downloading package problem_reports to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package problem_reports is already up-to-date! [nltk_data] | Downloading package propbank to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package propbank is already up-to-date! [nltk_data] | Downloading package ptb to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package ptb is already up-to-date! [nltk_data] | Downloading package product_reviews_1 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package product_reviews_1 is already up-to-date! [nltk_data] | Downloading package product_reviews_2 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package product_reviews_2 is already up-to-date! [nltk_data] | Downloading package pros_cons to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package pros_cons is already up-to-date! [nltk_data] | Downloading package qc to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package qc is already up-to-date! [nltk_data] | Downloading package reuters to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package reuters is already up-to-date! [nltk_data] | Downloading package rte to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package rte is already up-to-date! [nltk_data] | Downloading package semcor to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package semcor is already up-to-date! [nltk_data] | Downloading package senseval to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package senseval is already up-to-date! [nltk_data] | Downloading package sentiwordnet to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package sentiwordnet is already up-to-date! [nltk_data] | Downloading package sentence_polarity to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package sentence_polarity is already up-to-date! [nltk_data] | Downloading package shakespeare to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package shakespeare is already up-to-date! [nltk_data] | Downloading package sinica_treebank to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package sinica_treebank is already up-to-date! [nltk_data] | Downloading package smultron to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package smultron is already up-to-date! [nltk_data] | Downloading package state_union to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package state_union is already up-to-date! [nltk_data] | Downloading package stopwords to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package stopwords is already up-to-date! [nltk_data] | Downloading package subjectivity to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package subjectivity is already up-to-date! [nltk_data] | Downloading package swadesh to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package swadesh is already up-to-date! [nltk_data] | Downloading package switchboard to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package switchboard is already up-to-date! [nltk_data] | Downloading package timit to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package timit is already up-to-date! [nltk_data] | Downloading package toolbox to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package toolbox is already up-to-date! [nltk_data] | Downloading package treebank to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package treebank is already up-to-date! [nltk_data] | Downloading package twitter_samples to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package twitter_samples is already up-to-date! [nltk_data] | Downloading package udhr to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package udhr is already up-to-date! [nltk_data] | Downloading package udhr2 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package udhr2 is already up-to-date! [nltk_data] | Downloading package unicode_samples to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package unicode_samples is already up-to-date! [nltk_data] | Downloading package universal_treebanks_v20 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package universal_treebanks_v20 is already up-to- [nltk_data] | date! [nltk_data] | Downloading package verbnet to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package verbnet is already up-to-date! [nltk_data] | Downloading package verbnet3 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package verbnet3 is already up-to-date! [nltk_data] | Downloading package webtext to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package webtext is already up-to-date! [nltk_data] | Downloading package wordnet to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package wordnet is already up-to-date! [nltk_data] | Downloading package wordnet_ic to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package wordnet_ic is already up-to-date! [nltk_data] | Downloading package words to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package words is already up-to-date! [nltk_data] | Downloading package ycoe to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package ycoe is already up-to-date! [nltk_data] | Downloading package rslp to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package rslp is already up-to-date! [nltk_data] | Downloading package maxent_treebank_pos_tagger to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package maxent_treebank_pos_tagger is already up- [nltk_data] | to-date! [nltk_data] | Downloading package universal_tagset to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package universal_tagset is already up-to-date! [nltk_data] | Downloading package maxent_ne_chunker to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package maxent_ne_chunker is already up-to-date! [nltk_data] | Downloading package punkt to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package punkt is already up-to-date! [nltk_data] | Downloading package book_grammars to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package book_grammars is already up-to-date! [nltk_data] | Downloading package sample_grammars to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package sample_grammars is already up-to-date! [nltk_data] | Downloading package spanish_grammars to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package spanish_grammars is already up-to-date! [nltk_data] | Downloading package basque_grammars to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package basque_grammars is already up-to-date! [nltk_data] | Downloading package large_grammars to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package large_grammars is already up-to-date! [nltk_data] | Downloading package tagsets to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package tagsets is already up-to-date! [nltk_data] | Downloading package snowball_data to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package snowball_data is already up-to-date! [nltk_data] | Downloading package bllip_wsj_no_aux to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package bllip_wsj_no_aux is already up-to-date! [nltk_data] | Downloading package word2vec_sample to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package word2vec_sample is already up-to-date! [nltk_data] | Downloading package panlex_swadesh to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package panlex_swadesh is already up-to-date! [nltk_data] | Downloading package mte_teip5 to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package mte_teip5 is already up-to-date! [nltk_data] | Downloading package averaged_perceptron_tagger to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package averaged_perceptron_tagger is already up- [nltk_data] | to-date! [nltk_data] | Downloading package averaged_perceptron_tagger_ru to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package averaged_perceptron_tagger_ru is already [nltk_data] | up-to-date! [nltk_data] | Downloading package perluniprops to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package perluniprops is already up-to-date! [nltk_data] | Downloading package nonbreaking_prefixes to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package nonbreaking_prefixes is already up-to-date! [nltk_data] | Downloading package vader_lexicon to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package vader_lexicon is already up-to-date! [nltk_data] | Downloading package porter_test to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package porter_test is already up-to-date! [nltk_data] | Downloading package wmt15_eval to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package wmt15_eval is already up-to-date! [nltk_data] | Downloading package mwa_ppdb to [nltk_data] | C:\Users\pawan\AppData\Roaming\nltk_data... [nltk_data] | Package mwa_ppdb is already up-to-date! [nltk_data] | [nltk_data] Done downloading collection all
True
# path = '/content/drive/MyDrive/Files/'
path = 'C:\\Users\\pawan\\OneDrive\\Desktop\\ott\\Data\\'
df_tvshows = pd.read_csv(path + 'otttvshows.csv')
df_tvshows.head()
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | Language | Plotline | Runtime | Kind | Seasons | Netflix | Hulu | Prime Video | Disney+ | Type | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Snowpiercer | 2013 | 18+ | 6.9 | 94% | NaN | Daveed Diggs,Iddo Goldberg,Mickey Sumner,Aliso... | Action,Drama,Sci-Fi,Thriller | United States | English | Set seven years after the world has become a f... | 60.0 | tv series | 3.0 | 1 | 0 | 0 | 0 | 1 |
| 1 | 2 | Philadelphia | 1993 | 13+ | 8.8 | 80% | NaN | Charlie Day,Glenn Howerton,Rob McElhenney,Kait... | Comedy | United States | English | The gang, 5 raging alcoholic, narcissists run ... | 22.0 | tv series | 18.0 | 1 | 0 | 0 | 0 | 1 |
| 2 | 3 | Roma | 2018 | 18+ | 8.7 | 93% | NaN | Kevin McKidd,Ray Stevenson,Polly Walker,Kerry ... | Action,Drama,History,Romance,War | United Kingdom,United States | English | In this British historical drama, the turbulen... | 52.0 | tv series | 2.0 | 1 | 0 | 0 | 0 | 1 |
| 3 | 4 | Amy | 2015 | 18+ | 7.0 | 87% | NaN | Amy Brenneman,Richard T. Jones,Jessica Tuck,Ma... | Drama | United States | English | A family drama focused on three generations of... | 60.0 | tv series | 6.0 | 1 | 0 | 1 | 1 | 1 |
| 4 | 5 | The Young Offenders | 2016 | NaN | 8.0 | 100% | NaN | Alex Murphy,Chris Walley,Hilary Rose,Dominic M... | Comedy | United Kingdom,Ireland | English | NaN | 30.0 | tv series | 3.0 | 1 | 0 | 0 | 0 | 1 |
# profile = ProfileReport(df_tvshows)
# profile
def data_investigate(df):
print('No of Rows : ', df.shape[0])
print('No of Coloums : ', df.shape[1])
print('**'*25)
print('Colums Names : \n', df.columns)
print('**'*25)
print('Datatype of Columns : \n', df.dtypes)
print('**'*25)
print('Missing Values : ')
c = df.isnull().sum()
c = c[c > 0]
print(c)
print('**'*25)
print('Missing vaules %age wise :\n')
print((100*(df.isnull().sum()/len(df.index))))
print('**'*25)
print('Pictorial Representation : ')
plt.figure(figsize = (10, 10))
sns.heatmap(df.isnull(), yticklabels = False, cbar = False)
plt.show()
data_investigate(df_tvshows)
No of Rows : 5432
No of Coloums : 20
**************************************************
Colums Names :
Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type'],
dtype='object')
**************************************************
Datatype of Columns :
ID int64
Title object
Year int64
Age object
IMDb float64
Rotten Tomatoes object
Directors object
Cast object
Genres object
Country object
Language object
Plotline object
Runtime float64
Kind object
Seasons float64
Netflix int64
Hulu int64
Prime Video int64
Disney+ int64
Type int64
dtype: object
**************************************************
Missing Values :
Age 1954
IMDb 556
Rotten Tomatoes 4194
Directors 5158
Cast 486
Genres 323
Country 549
Language 638
Plotline 2493
Runtime 1410
Seasons 679
dtype: int64
**************************************************
Missing vaules %age wise :
ID 0.000000
Title 0.000000
Year 0.000000
Age 35.972018
IMDb 10.235641
Rotten Tomatoes 77.209131
Directors 94.955817
Cast 8.946981
Genres 5.946244
Country 10.106775
Language 11.745214
Plotline 45.894698
Runtime 25.957290
Kind 0.000000
Seasons 12.500000
Netflix 0.000000
Hulu 0.000000
Prime Video 0.000000
Disney+ 0.000000
Type 0.000000
dtype: float64
**************************************************
Pictorial Representation :
# ID
# df_tvshows = df_tvshows.drop(['ID'], axis = 1)
# Age
df_tvshows.loc[df_tvshows['Age'].isnull() & df_tvshows['Disney+'] == 1, "Age"] = '13'
# df_tvshows.fillna({'Age' : 18}, inplace = True)
df_tvshows.fillna({'Age' : 'NR'}, inplace = True)
df_tvshows['Age'].replace({'all': '0'}, inplace = True)
df_tvshows['Age'].replace({'7+': '7'}, inplace = True)
df_tvshows['Age'].replace({'13+': '13'}, inplace = True)
df_tvshows['Age'].replace({'16+': '16'}, inplace = True)
df_tvshows['Age'].replace({'18+': '18'}, inplace = True)
# df_tvshows['Age'] = df_tvshows['Age'].astype(int)
# IMDb
# df_tvshows.fillna({'IMDb' : df_tvshows['IMDb'].mean()}, inplace = True)
# df_tvshows.fillna({'IMDb' : df_tvshows['IMDb'].median()}, inplace = True)
df_tvshows.fillna({'IMDb' : "NA"}, inplace = True)
# Rotten Tomatoes
df_tvshows['Rotten Tomatoes'] = df_tvshows['Rotten Tomatoes'][df_tvshows['Rotten Tomatoes'].notnull()].str.replace('%', '').astype(int)
# df_tvshows['Rotten Tomatoes'] = df_tvshows['Rotten Tomatoes'][df_tvshows['Rotten Tomatoes'].notnull()].astype(int)
# df_tvshows.fillna({'Rotten Tomatoes' : df_tvshows['Rotten Tomatoes'].mean()}, inplace = True)
# df_tvshows.fillna({'Rotten Tomatoes' : df_tvshows['Rotten Tomatoes'].median()}, inplace = True)
# df_tvshows['Rotten Tomatoes'] = df_tvshows['Rotten Tomatoes'].astype(int)
df_tvshows.fillna({'Rotten Tomatoes' : "NA"}, inplace = True)
# Directors
# df_tvshows = df_tvshows.drop(['Directors'], axis = 1)
df_tvshows.fillna({'Directors' : "NA"}, inplace = True)
# Cast
df_tvshows.fillna({'Cast' : "NA"}, inplace = True)
# Genres
df_tvshows.fillna({'Genres': "NA"}, inplace = True)
# Country
df_tvshows.fillna({'Country': "NA"}, inplace = True)
# Language
df_tvshows.fillna({'Language': "NA"}, inplace = True)
# Plotline
df_tvshows.fillna({'Plotline': "NA"}, inplace = True)
# Runtime
# df_tvshows.fillna({'Runtime' : df_tvshows['Runtime'].mean()}, inplace = True)
# df_tvshows['Runtime'] = df_tvshows['Runtime'].astype(int)
df_tvshows.fillna({'Runtime' : "NA"}, inplace = True)
# Kind
# df_tvshows.fillna({'Kind': "NA"}, inplace = True)
# Type
# df_tvshows.fillna({'Type': "NA"}, inplace = True)
# df_tvshows = df_tvshows.drop(['Type'], axis = 1)
# Seasons
# df_tvshows.fillna({'Seasons': 1}, inplace = True)
df_tvshows.fillna({'Seasons': "NA"}, inplace = True)
# df_tvshows = df_tvshows.drop(['Seasons'], axis = 1)
# df_tvshows['Seasons'] = df_tvshows['Seasons'].astype(int)
# df_tvshows.fillna({'Seasons' : df_tvshows['Seasons'].mean()}, inplace = True)
# df_tvshows['Seasons'] = df_tvshows['Seasons'].astype(int)
# Service Provider
df_tvshows['Service Provider'] = df_tvshows.loc[:, ['Netflix', 'Prime Video', 'Disney+', 'Hulu']].idxmax(axis = 1)
# df_tvshows.drop(['Netflix','Prime Video','Disney+','Hulu'], axis = 1)
# Removing Duplicate and Missing Entries
df_tvshows.dropna(how = 'any', inplace = True)
df_tvshows.drop_duplicates(inplace = True)
data_investigate(df_tvshows)
No of Rows : 5432
No of Coloums : 21
**************************************************
Colums Names :
Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type',
'Service Provider'],
dtype='object')
**************************************************
Datatype of Columns :
ID int64
Title object
Year int64
Age object
IMDb object
Rotten Tomatoes object
Directors object
Cast object
Genres object
Country object
Language object
Plotline object
Runtime object
Kind object
Seasons object
Netflix int64
Hulu int64
Prime Video int64
Disney+ int64
Type int64
Service Provider object
dtype: object
**************************************************
Missing Values :
Series([], dtype: int64)
**************************************************
Missing vaules %age wise :
ID 0.0
Title 0.0
Year 0.0
Age 0.0
IMDb 0.0
Rotten Tomatoes 0.0
Directors 0.0
Cast 0.0
Genres 0.0
Country 0.0
Language 0.0
Plotline 0.0
Runtime 0.0
Kind 0.0
Seasons 0.0
Netflix 0.0
Hulu 0.0
Prime Video 0.0
Disney+ 0.0
Type 0.0
Service Provider 0.0
dtype: float64
**************************************************
Pictorial Representation :
df_tvshows.head()
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Plotline | Runtime | Kind | Seasons | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Snowpiercer | 2013 | 18 | 6.9 | 94 | NA | Daveed Diggs,Iddo Goldberg,Mickey Sumner,Aliso... | Action,Drama,Sci-Fi,Thriller | United States | ... | Set seven years after the world has become a f... | 60 | tv series | 3 | 1 | 0 | 0 | 0 | 1 | Netflix |
| 1 | 2 | Philadelphia | 1993 | 13 | 8.8 | 80 | NA | Charlie Day,Glenn Howerton,Rob McElhenney,Kait... | Comedy | United States | ... | The gang, 5 raging alcoholic, narcissists run ... | 22 | tv series | 18 | 1 | 0 | 0 | 0 | 1 | Netflix |
| 2 | 3 | Roma | 2018 | 18 | 8.7 | 93 | NA | Kevin McKidd,Ray Stevenson,Polly Walker,Kerry ... | Action,Drama,History,Romance,War | United Kingdom,United States | ... | In this British historical drama, the turbulen... | 52 | tv series | 2 | 1 | 0 | 0 | 0 | 1 | Netflix |
| 3 | 4 | Amy | 2015 | 18 | 7 | 87 | NA | Amy Brenneman,Richard T. Jones,Jessica Tuck,Ma... | Drama | United States | ... | A family drama focused on three generations of... | 60 | tv series | 6 | 1 | 0 | 1 | 1 | 1 | Netflix |
| 4 | 5 | The Young Offenders | 2016 | NR | 8 | 100 | NA | Alex Murphy,Chris Walley,Hilary Rose,Dominic M... | Comedy | United Kingdom,Ireland | ... | NA | 30 | tv series | 3 | 1 | 0 | 0 | 0 | 1 | Netflix |
5 rows × 21 columns
df_tvshows.describe()
| ID | Year | Netflix | Hulu | Prime Video | Disney+ | Type | |
|---|---|---|---|---|---|---|---|
| count | 5432.000000 | 5432.000000 | 5432.000000 | 5432.000000 | 5432.000000 | 5432.000000 | 5432.0 |
| mean | 2716.500000 | 2010.668446 | 0.341311 | 0.293999 | 0.403351 | 0.033689 | 1.0 |
| std | 1568.227662 | 11.726176 | 0.474193 | 0.455633 | 0.490615 | 0.180445 | 0.0 |
| min | 1.000000 | 1901.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.0 |
| 25% | 1358.750000 | 2009.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.0 |
| 50% | 2716.500000 | 2014.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.0 |
| 75% | 4074.250000 | 2017.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 1.0 |
| max | 5432.000000 | 2020.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.0 |
df_tvshows.corr()
| ID | Year | Netflix | Hulu | Prime Video | Disney+ | Type | |
|---|---|---|---|---|---|---|---|
| ID | 1.000000 | -0.031346 | -0.646330 | 0.034293 | 0.441264 | 0.195409 | NaN |
| Year | -0.031346 | 1.000000 | 0.222316 | -0.065807 | -0.198675 | -0.022741 | NaN |
| Netflix | -0.646330 | 0.222316 | 1.000000 | -0.366515 | -0.515086 | -0.119344 | NaN |
| Hulu | 0.034293 | -0.065807 | -0.366515 | 1.000000 | -0.377374 | -0.075701 | NaN |
| Prime Video | 0.441264 | -0.198675 | -0.515086 | -0.377374 | 1.000000 | -0.151442 | NaN |
| Disney+ | 0.195409 | -0.022741 | -0.119344 | -0.075701 | -0.151442 | 1.000000 | NaN |
| Type | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
# df_tvshows.sort_values('Year', ascending = True)
# df_tvshows.sort_values('IMDb', ascending = False)
# df_tvshows.to_csv(path_or_buf= '/content/drive/MyDrive/Files/updated_otttvshows.csv', index = False)
# path = '/content/drive/MyDrive/Files/'
# udf_tvshows = pd.read_csv(path + 'updated_otttvshows.csv')
# udf_tvshows
# df_netflix_tvshows = df_tvshows.loc[(df_tvshows['Netflix'] > 0)]
# df_hulu_tvshows = df_tvshows.loc[(df_tvshows['Hulu'] > 0)]
# df_prime_video_tvshows = df_tvshows.loc[(df_tvshows['Prime Video'] > 0)]
# df_disney_tvshows = df_tvshows.loc[(df_tvshows['Disney+'] > 0)]
df_netflix_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 1) & (df_tvshows['Hulu'] == 0) & (df_tvshows['Prime Video'] == 0 ) & (df_tvshows['Disney+'] == 0)]
df_hulu_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 0) & (df_tvshows['Hulu'] == 1) & (df_tvshows['Prime Video'] == 0 ) & (df_tvshows['Disney+'] == 0)]
df_prime_video_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 0) & (df_tvshows['Hulu'] == 0) & (df_tvshows['Prime Video'] == 1 ) & (df_tvshows['Disney+'] == 0)]
df_disney_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 0) & (df_tvshows['Hulu'] == 0) & (df_tvshows['Prime Video'] == 0 ) & (df_tvshows['Disney+'] == 1)]
df_tvshows_countries = df_tvshows.copy()
df_tvshows_countries.drop(df_tvshows_countries.loc[df_tvshows_countries['Country'] == "NA"].index, inplace = True)
# df_tvshows_countries = df_tvshows_countries[df_tvshows_countries.Country != "NA"]
# df_tvshows_countries['Country'] = df_tvshows_countries['Country'].astype(str)
df_tvshows_count_countries = df_tvshows_countries.copy()
df_tvshows_country = df_tvshows_countries.copy()
# Create countries dict where key=name and value = number of countries
countries = {}
for i in df_tvshows_count_countries['Country'].dropna():
if i != "NA":
#print(i,len(i.split(',')))
countries[i] = len(i.split(','))
else:
countries[i] = 0
# Add this information to our dataframe as a new column
df_tvshows_count_countries['Number of Countries'] = df_tvshows_count_countries['Country'].map(countries).astype(int)
df_tvshows_mixed_countries = df_tvshows_count_countries.copy()
# Creating distinct dataframes only with the tvshows present on individual streaming platforms
netflix_countries_tvshows = df_tvshows_count_countries.loc[df_tvshows_count_countries['Netflix'] == 1]
hulu_countries_tvshows = df_tvshows_count_countries.loc[df_tvshows_count_countries['Hulu'] == 1]
prime_video_countries_tvshows = df_tvshows_count_countries.loc[df_tvshows_count_countries['Prime Video'] == 1]
disney_countries_tvshows = df_tvshows_count_countries.loc[df_tvshows_count_countries['Disney+'] == 1]
plt.figure(figsize = (10, 10))
corr = df_tvshows_count_countries.corr()
# Plot figsize
fig, ax = plt.subplots(figsize=(10, 8))
# Generate Heat Map, alleast annotations and place floats in map
sns.heatmap(corr, cmap = 'magma', annot = True, fmt = ".2f")
# Apply xticks
plt.xticks(range(len(corr.columns)), corr.columns);
# Apply yticks
plt.yticks(range(len(corr.columns)), corr.columns)
# show plot
plt.show()
fig.show()
<Figure size 720x720 with 0 Axes>
df_countries_most_tvshows = df_tvshows_count_countries.sort_values(by = 'Number of Countries', ascending = False).reset_index()
df_countries_most_tvshows = df_countries_most_tvshows.drop(['index'], axis = 1)
# filter = (df_tvshows_count_countries['Number of Countries'] == (df_tvshows_count_countries['Number of Countries'].max()))
# df_countries_most_tvshows = df_tvshows_count_countries[filter]
# mostest_rated_tvshows = df_tvshows_count_countries.loc[df_tvshows_count_countries['Number of Countries'].idxmax()]
print('\nTV Shows with Highest Ever Number of Countries are : \n')
df_countries_most_tvshows.head(5)
TV Shows with Highest Ever Number of Countries are :
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Runtime | Kind | Seasons | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Number of Countries | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5371 | Bonkers | 1993 | 7 | 6.8 | NA | NA | Jim Cummings,Earl Boen,Frank Welker,Jeff Benne... | Animation,Action,Adventure,Comedy,Crime,Family | United States,Hong Kong,South Korea,France,Can... | ... | 30 | tv series | 1 | 0 | 0 | 0 | 1 | 1 | Disney+ | 11 |
| 1 | 3833 | Trapped | 2015 | 16 | 8.1 | NA | NA | Ólafur Darri Ólafsson,Ilmur Kristjánsdóttir,In... | Crime,Drama,Mystery,Thriller | Iceland,Denmark,Finland,Sweden,Norway,Germany,... | ... | 60 | tv series | 3 | 0 | 0 | 1 | 0 | 1 | Prime Video | 8 |
| 2 | 1401 | Oggy and the Cockroaches | 1998 | 7 | 7.3 | NA | NA | Hugues Le Bars,Michel Elias | Animation,Action,Comedy,Family | France,Canada,Philippines,Vietnam,South Korea,... | ... | 8 | tv series | 7 | 1 | 0 | 0 | 0 | 1 | Netflix | 7 |
| 3 | 345 | Scarlett | 2016 | 13 | 6.5 | NA | NA | Joanne Whalley,Timothy Dalton,Annabeth Gish,Ju... | Drama,Romance | France,United States,Germany,Italy,United King... | ... | 360 | tv series | 1 | 0 | 0 | 1 | 0 | 1 | Prime Video | 7 |
| 4 | 2073 | YooHoo & Friends (US) | 2012 | 0 | 6.2 | NA | NA | Sang Hyun Uhm,Jeon Sook Kyung,Lee Won Chan,Sin... | Animation,Comedy,Family | United States,South Korea,China,France,Japan,C... | ... | NA | tv series | 2 | 1 | 0 | 0 | 0 | 1 | Netflix | 7 |
5 rows × 22 columns
fig = px.bar(y = df_countries_most_tvshows['Title'][:15],
x = df_countries_most_tvshows['Number of Countries'][:15],
color = df_countries_most_tvshows['Number of Countries'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'TV Shows', 'x' : 'Number of Countries'},
title = 'TV Shows with Highest Number of Countries : All Platforms')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
df_countries_least_tvshows = df_tvshows_count_countries.sort_values(by = 'Number of Countries', ascending = True).reset_index()
df_countries_least_tvshows = df_countries_least_tvshows.drop(['index'], axis = 1)
# filter = (df_tvshows_count_countries['Number of Countries'] == (df_tvshows_count_countries['Number of Countries'].min()))
# df_countries_least_tvshows = df_tvshows_count_countries[filter]
print('\nTV Shows with Lowest Ever Number of Countries are : \n')
df_countries_least_tvshows.head(5)
TV Shows with Lowest Ever Number of Countries are :
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Runtime | Kind | Seasons | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Number of Countries | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Snowpiercer | 2013 | 18 | 6.9 | 94 | NA | Daveed Diggs,Iddo Goldberg,Mickey Sumner,Aliso... | Action,Drama,Sci-Fi,Thriller | United States | ... | 60 | tv series | 3 | 1 | 0 | 0 | 0 | 1 | Netflix | 1 |
| 1 | 3376 | BEM | 2019 | NR | 6 | NA | NA | Felecia Angelle,Dani Chambers,Aaron Dismuke,Ja... | Animation,Horror | Japan | ... | NA | tv series | 1 | 0 | 1 | 0 | 0 | 1 | Hulu | 1 |
| 2 | 3374 | Barefoot Contessa: Back to Basics | 2002 | 0 | 7.7 | NA | NA | NA | Reality-TV | United States | ... | NA | tv series | 10 | 0 | 1 | 0 | 0 | 1 | Hulu | 1 |
| 3 | 3373 | Get Ace | 2014 | 0 | 7.5 | NA | NA | Jeffery Richards,David Myles Brown,Lyall Brook... | Animation,Fantasy,Sci-Fi | Australia | ... | 12 | tv series | 2 | 0 | 1 | 0 | 0 | 1 | Hulu | 1 |
| 4 | 3371 | Murder on the Internet | 2017 | NR | 6.8 | NA | NA | Emma Kenny,Siobhan McFadyen,Sam Meadows | Documentary,Crime | United Kingdom | ... | 44 | tv series | 1 | 0 | 1 | 1 | 0 | 1 | Prime Video | 1 |
5 rows × 22 columns
fig = px.bar(y = df_countries_least_tvshows['Title'][:15],
x = df_countries_least_tvshows['Number of Countries'][:15],
color = df_countries_least_tvshows['Number of Countries'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'TV Shows', 'x' : 'Number of Countries'},
title = 'TV Shows with Lowest Number of Countries : All Platforms')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
print(f'''
Total '{df_tvshows_count_countries['Number of Countries'].unique().shape[0]}' unique Number of Countries s were Given, They were Like this,\n
{df_tvshows_count_countries.sort_values(by = 'Number of Countries', ascending = False)['Number of Countries'].unique()}\n
The Highest Number of Countries Ever Any TV Show Got is '{df_countries_most_tvshows['Title'][0]}' : '{df_countries_most_tvshows['Number of Countries'].max()}'\n
The Lowest Number of Countries Ever Any TV Show Got is '{df_countries_least_tvshows['Title'][0]}' : '{df_countries_least_tvshows['Number of Countries'].min()}'\n
''')
Total '9' unique Number of Countries s were Given, They were Like this,
[11 8 7 6 5 4 3 2 1]
The Highest Number of Countries Ever Any TV Show Got is 'Bonkers' : '11'
The Lowest Number of Countries Ever Any TV Show Got is 'Snowpiercer' : '1'
netflix_countries_most_tvshows = df_countries_most_tvshows.loc[df_countries_most_tvshows['Netflix']==1].reset_index()
netflix_countries_most_tvshows = netflix_countries_most_tvshows.drop(['index'], axis = 1)
netflix_countries_least_tvshows = df_countries_least_tvshows.loc[df_countries_least_tvshows['Netflix']==1].reset_index()
netflix_countries_least_tvshows = netflix_countries_least_tvshows.drop(['index'], axis = 1)
netflix_countries_most_tvshows.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Runtime | Kind | Seasons | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Number of Countries | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1401 | Oggy and the Cockroaches | 1998 | 7 | 7.3 | NA | NA | Hugues Le Bars,Michel Elias | Animation,Action,Comedy,Family | France,Canada,Philippines,Vietnam,South Korea,... | ... | 8 | tv series | 7 | 1 | 0 | 0 | 0 | 1 | Netflix | 7 |
| 1 | 2073 | YooHoo & Friends (US) | 2012 | 0 | 6.2 | NA | NA | Sang Hyun Uhm,Jeon Sook Kyung,Lee Won Chan,Sin... | Animation,Comedy,Family | United States,South Korea,China,France,Japan,C... | ... | NA | tv series | 2 | 1 | 0 | 0 | 0 | 1 | Netflix | 7 |
| 2 | 52 | Abominable Christmas | 2012 | NR | 5.3 | NA | Chad Van De Keere | Ariel Winter,Drake Bell,Emilio Estevez,Isabell... | Animation,Short,Adventure,Comedy,Family | United States,India,Canada,Sri Lanka,South Afr... | ... | 43 | tv series | NA | 1 | 0 | 0 | 0 | 1 | Netflix | 6 |
| 3 | 1359 | Ultimate Beastmaster | 2017 | 7 | 7.3 | NA | NA | Tiki Barber,Rafinha Bastos,Luis Ernesto Franco... | Game-Show,Reality-TV | United States,Brazil,South Korea,Mexico,Japan,... | ... | 55 | tv series | 3 | 1 | 0 | 0 | 0 | 1 | Netflix | 6 |
| 4 | 798 | Frozen Planet | 2011 | 7 | 9 | NA | NA | David Attenborough,Alec Baldwin,Chadden Hunter... | Documentary | United Kingdom,United States,Spain,Germany,Gre... | ... | 333 | tv series | 1 | 1 | 0 | 0 | 0 | 1 | Netflix | 6 |
5 rows × 22 columns
fig = px.bar(y = netflix_countries_most_tvshows['Title'][:15],
x = netflix_countries_most_tvshows['Number of Countries'][:15],
color = netflix_countries_most_tvshows['Number of Countries'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'TV Shows', 'x' : 'Number of Countries'},
title = 'TV Shows with Highest Number of Countries : Netflix')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.bar(y = netflix_countries_least_tvshows['Title'][:15],
x = netflix_countries_least_tvshows['Number of Countries'][:15],
color = netflix_countries_least_tvshows['Number of Countries'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'TV Shows', 'x' : 'Number of Countries'},
title = 'TV Shows with Lowest Number of Countries : Netflix')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
hulu_countries_most_tvshows = df_countries_most_tvshows.loc[df_countries_most_tvshows['Hulu']==1].reset_index()
hulu_countries_most_tvshows = hulu_countries_most_tvshows.drop(['index'], axis = 1)
hulu_countries_least_tvshows = df_countries_least_tvshows.loc[df_countries_least_tvshows['Hulu']==1].reset_index()
hulu_countries_least_tvshows = hulu_countries_least_tvshows.drop(['index'], axis = 1)
hulu_countries_most_tvshows.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Runtime | Kind | Seasons | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Number of Countries | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2465 | The Amazing World of Gumball | 2011 | 7 | 8.2 | NA | NA | Dan Russell,Teresa Gallagher,Kerry Shale,Kyla ... | Animation,Adventure,Comedy,Family,Fantasy | United Kingdom,Ireland,United States,Germany,J... | ... | 11 | tv series | 6 | 0 | 1 | 0 | 0 | 1 | Hulu | 6 |
| 1 | 2275 | Gravity Falls | 2012 | 7 | 8.9 | 100 | NA | Jason Ritter,Alex Hirsch,Kristen Schaal,Linda ... | Animation,Adventure,Comedy,Drama,Family,Fantas... | United States,Argentina,Australia,United Kingd... | ... | 23 | tv series | 2 | 0 | 1 | 0 | 1 | 1 | Disney+ | 6 |
| 2 | 3450 | Chloe's Closet | 2010 | 0 | 6.8 | NA | NA | Teresa Beausang,Oisín Kearns,Siobhán Ní Thuair... | Animation,Adventure,Comedy,Family,Fantasy | United States,Germany,United Kingdom,Netherlan... | ... | 11 | tv series | 4 | 0 | 1 | 1 | 0 | 1 | Prime Video | 6 |
| 3 | 2490 | Star vs. the Forces of Evil | 2015 | 7 | 8 | NA | NA | Eden Sher,Adam McArthur,Grey Griffin,Daron Nef... | Animation,Action,Adventure,Comedy,Drama,Family... | United States,Spain,United Kingdom,Mexico,Japan | ... | 22 | tv series | 4 | 0 | 1 | 0 | 1 | 1 | Disney+ | 5 |
| 4 | 2406 | Steven Universe | 2013 | 7 | 8.2 | 100 | NA | Zach Callison,Deedee Magno,Michaela Dietz,Este... | Animation,Action,Adventure,Comedy,Drama,Family... | United States,South Korea,Spain,Japan,Mexico | ... | 11 | tv series | 6 | 0 | 1 | 0 | 0 | 1 | Hulu | 5 |
5 rows × 22 columns
fig = px.bar(y = hulu_countries_most_tvshows['Title'][:15],
x = hulu_countries_most_tvshows['Number of Countries'][:15],
color = hulu_countries_most_tvshows['Number of Countries'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'TV Shows', 'x' : 'Number of Countries'},
title = 'TV Shows with Highest Number of Countries : Hulu')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.bar(y = hulu_countries_least_tvshows['Title'][:15],
x = hulu_countries_least_tvshows['Number of Countries'][:15],
color = hulu_countries_least_tvshows['Number of Countries'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'TV Shows', 'x' : 'Number of Countries'},
title = 'TV Shows with Lowest Number of Countries : Hulu')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
prime_video_countries_most_tvshows = df_countries_most_tvshows.loc[df_countries_most_tvshows['Prime Video']==1].reset_index()
prime_video_countries_most_tvshows = prime_video_countries_most_tvshows.drop(['index'], axis = 1)
prime_video_countries_least_tvshows = df_countries_least_tvshows.loc[df_countries_least_tvshows['Prime Video']==1].reset_index()
prime_video_countries_least_tvshows = prime_video_countries_least_tvshows.drop(['index'], axis = 1)
prime_video_countries_most_tvshows.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Runtime | Kind | Seasons | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Number of Countries | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 3833 | Trapped | 2015 | 16 | 8.1 | NA | NA | Ólafur Darri Ólafsson,Ilmur Kristjánsdóttir,In... | Crime,Drama,Mystery,Thriller | Iceland,Denmark,Finland,Sweden,Norway,Germany,... | ... | 60 | tv series | 3 | 0 | 0 | 1 | 0 | 1 | Prime Video | 8 |
| 1 | 345 | Scarlett | 2016 | 13 | 6.5 | NA | NA | Joanne Whalley,Timothy Dalton,Annabeth Gish,Ju... | Drama,Romance | France,United States,Germany,Italy,United King... | ... | 360 | tv series | 1 | 0 | 0 | 1 | 0 | 1 | Prime Video | 7 |
| 2 | 3980 | Titanic: Blood and Steel | 2012 | 16 | 7.3 | NA | NA | Kevin Zegers,Alessandra Mastronardi,Derek Jaco... | Drama,History | Ireland,Italy,France,Canada,United Kingdom,Spa... | ... | 55 | tv series | 1 | 0 | 0 | 1 | 0 | 1 | Prime Video | 7 |
| 3 | 4290 | GetBackers | 2002 | 7 | 7.4 | NA | NA | Darren Pleavin,Shanon Weaver,Jason Liebrecht,O... | Animation,Action,Adventure,Comedy,Crime,Drama,... | Japan,Italy,United Kingdom,Mexico,United State... | ... | 24 | tv series | 1 | 0 | 0 | 1 | 0 | 1 | Prime Video | 6 |
| 4 | 4046 | The Busy World of Richard Scarry | 1994 | 0 | 7.4 | NA | NA | Peter Wildman,Denis Akiyama,Carl Banas,George ... | Animation,Family | Canada,France,Italy,United States,United Kingd... | ... | 30 | tv series | 5 | 0 | 0 | 1 | 0 | 1 | Prime Video | 6 |
5 rows × 22 columns
fig = px.bar(y = prime_video_countries_most_tvshows['Title'][:15],
x = prime_video_countries_most_tvshows['Number of Countries'][:15],
color = prime_video_countries_most_tvshows['Number of Countries'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'TV Shows', 'x' : 'Number of Countries'},
title = 'TV Shows with Highest Number of Countries : Prime Video')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.bar(y = prime_video_countries_least_tvshows['Title'][:15],
x = prime_video_countries_least_tvshows['Number of Countries'][:15],
color = prime_video_countries_least_tvshows['Number of Countries'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'TV Shows', 'x' : 'Number of Countries'},
title = 'TV Shows with Lowest Number of Countries : Prime Video')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
disney_countries_most_tvshows = df_countries_most_tvshows.loc[df_countries_most_tvshows['Disney+']==1].reset_index()
disney_countries_most_tvshows = disney_countries_most_tvshows.drop(['index'], axis = 1)
disney_countries_least_tvshows = df_countries_least_tvshows.loc[df_countries_least_tvshows['Disney+']==1].reset_index()
disney_countries_least_tvshows = disney_countries_least_tvshows.drop(['index'], axis = 1)
disney_countries_most_tvshows.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Runtime | Kind | Seasons | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Number of Countries | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5371 | Bonkers | 1993 | 7 | 6.8 | NA | NA | Jim Cummings,Earl Boen,Frank Welker,Jeff Benne... | Animation,Action,Adventure,Comedy,Crime,Family | United States,Hong Kong,South Korea,France,Can... | ... | 30 | tv series | 1 | 0 | 0 | 0 | 1 | 1 | Disney+ | 11 |
| 1 | 2275 | Gravity Falls | 2012 | 7 | 8.9 | 100 | NA | Jason Ritter,Alex Hirsch,Kristen Schaal,Linda ... | Animation,Adventure,Comedy,Drama,Family,Fantas... | United States,Argentina,Australia,United Kingd... | ... | 23 | tv series | 2 | 0 | 1 | 0 | 1 | 1 | Disney+ | 6 |
| 2 | 2490 | Star vs. the Forces of Evil | 2015 | 7 | 8 | NA | NA | Eden Sher,Adam McArthur,Grey Griffin,Daron Nef... | Animation,Action,Adventure,Comedy,Drama,Family... | United States,Spain,United Kingdom,Mexico,Japan | ... | 22 | tv series | 4 | 0 | 1 | 0 | 1 | 1 | Disney+ | 5 |
| 3 | 5356 | Iron Man: Armored Adventures | 2009 | 7 | 6.5 | 60 | NA | Adrian Petriw,Daniel Bacon,Anna Cummer,Lisa An... | Animation,Action,Adventure,Family,Fantasy,Sci-Fi | Canada,United States,United Kingdom,France,Lux... | ... | 22 | tv series | 2 | 0 | 0 | 0 | 1 | 1 | Disney+ | 5 |
| 4 | 3350 | Henry Hugglemonster | 2013 | 0 | 5.2 | NA | NA | Lara Jill Miller,Hynden Walch,Tom Kenny,Kari W... | Animation,Adventure,Family,Fantasy,Music | Ireland,United Kingdom,United States,South Kor... | ... | 22 | tv series | 2 | 0 | 1 | 0 | 1 | 1 | Disney+ | 5 |
5 rows × 22 columns
fig = px.bar(y = disney_countries_most_tvshows['Title'][:15],
x = disney_countries_most_tvshows['Number of Countries'][:15],
color = disney_countries_most_tvshows['Number of Countries'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'TV Shows', 'x' : 'Number of Countries'},
title = 'TV Shows with Highest Number of Countries : Disney+')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.bar(y = disney_countries_least_tvshows['Title'][:15],
x = disney_countries_least_tvshows['Number of Countries'][:15],
color = disney_countries_least_tvshows['Number of Countries'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'TV Shows', 'x' : 'Number of Countries'},
title = 'TV Shows with Lowest Number of Countries : Disney+')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
print(f'''
The TV Show with Highest Number of Countries Ever Got is '{df_countries_most_tvshows['Title'][0]}' : '{df_countries_most_tvshows['Number of Countries'].max()}'\n
The TV Show with Lowest Number of Countries Ever Got is '{df_countries_least_tvshows['Title'][0]}' : '{df_countries_least_tvshows['Number of Countries'].min()}'\n
The TV Show with Highest Number of Countries on 'Netflix' is '{netflix_countries_most_tvshows['Title'][0]}' : '{netflix_countries_most_tvshows['Number of Countries'].max()}'\n
The TV Show with Lowest Number of Countries on 'Netflix' is '{netflix_countries_least_tvshows['Title'][0]}' : '{netflix_countries_least_tvshows['Number of Countries'].min()}'\n
The TV Show with Highest Number of Countries on 'Hulu' is '{hulu_countries_most_tvshows['Title'][0]}' : '{hulu_countries_most_tvshows['Number of Countries'].max()}'\n
The TV Show with Lowest Number of Countries on 'Hulu' is '{hulu_countries_least_tvshows['Title'][0]}' : '{hulu_countries_least_tvshows['Number of Countries'].min()}'\n
The TV Show with Highest Number of Countries on 'Prime Video' is '{prime_video_countries_most_tvshows['Title'][0]}' : '{prime_video_countries_most_tvshows['Number of Countries'].max()}'\n
The TV Show with Lowest Number of Countries on 'Prime Video' is '{prime_video_countries_least_tvshows['Title'][0]}' : '{prime_video_countries_least_tvshows['Number of Countries'].min()}'\n
The TV Show with Highest Number of Countries on 'Disney+' is '{disney_countries_most_tvshows['Title'][0]}' : '{disney_countries_most_tvshows['Number of Countries'].max()}'\n
The TV Show with Lowest Number of Countries on 'Disney+' is '{disney_countries_least_tvshows['Title'][0]}' : '{disney_countries_least_tvshows['Number of Countries'].min()}'\n
''')
The TV Show with Highest Number of Countries Ever Got is 'Bonkers' : '11'
The TV Show with Lowest Number of Countries Ever Got is 'Snowpiercer' : '1'
The TV Show with Highest Number of Countries on 'Netflix' is 'Oggy and the Cockroaches' : '7'
The TV Show with Lowest Number of Countries on 'Netflix' is 'Snowpiercer' : '1'
The TV Show with Highest Number of Countries on 'Hulu' is 'The Amazing World of Gumball' : '6'
The TV Show with Lowest Number of Countries on 'Hulu' is 'BEM' : '1'
The TV Show with Highest Number of Countries on 'Prime Video' is 'Trapped' : '8'
The TV Show with Lowest Number of Countries on 'Prime Video' is 'Murder on the Internet' : '1'
The TV Show with Highest Number of Countries on 'Disney+' is 'Bonkers' : '11'
The TV Show with Lowest Number of Countries on 'Disney+' is 'Lost Treasures of Egypt' : '1'
print(f'''
Accross All Platforms the Average Number of Countries is '{round(df_tvshows_count_countries['Number of Countries'].mean(), ndigits = 2)}'\n
The Average Number of Countries on 'Netflix' is '{round(netflix_countries_tvshows['Number of Countries'].mean(), ndigits = 2)}'\n
The Average Number of Countries on 'Hulu' is '{round(hulu_countries_tvshows['Number of Countries'].mean(), ndigits = 2)}'\n
The Average Number of Countries on 'Prime Video' is '{round(prime_video_countries_tvshows['Number of Countries'].mean(), ndigits = 2)}'\n
The Average Number of Countries on 'Disney+' is '{round(disney_countries_tvshows['Number of Countries'].mean(), ndigits = 2)}'\n
''')
Accross All Platforms the Average Number of Countries is '1.15'
The Average Number of Countries on 'Netflix' is '1.17'
The Average Number of Countries on 'Hulu' is '1.11'
The Average Number of Countries on 'Prime Video' is '1.16'
The Average Number of Countries on 'Disney+' is '1.32'
print(f'''
Accross All Platforms Total Count of Country is '{df_tvshows_count_countries['Number of Countries'].max()}'\n
Total Count of Country on 'Netflix' is '{netflix_countries_tvshows['Number of Countries'].max()}'\n
Total Count of Country on 'Hulu' is '{hulu_countries_tvshows['Number of Countries'].max()}'\n
Total Count of Country on 'Prime Video' is '{prime_video_countries_tvshows['Number of Countries'].max()}'\n
Total Count of Country on 'Disney+' is '{disney_countries_tvshows['Number of Countries'].max()}'\n
''')
Accross All Platforms Total Count of Country is '11'
Total Count of Country on 'Netflix' is '7'
Total Count of Country on 'Hulu' is '6'
Total Count of Country on 'Prime Video' is '8'
Total Count of Country on 'Disney+' is '11'
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_tvshows_count_countries['Number of Countries'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_tvshows_count_countries['Number of Countries'], ax = ax[1])
plt.show()
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Number of Countries s Per Platform')
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_countries_tvshows['Number of Countries'], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_countries_tvshows['Number of Countries'], color = 'red', legend = True, kde = True)
sns.histplot(hulu_countries_tvshows['Number of Countries'], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_countries_tvshows['Number of Countries'], color = 'darkblue', legend = True, kde = True)
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
df_lan = df_tvshows_country['Country'].str.split(',').apply(pd.Series).stack()
del df_tvshows_country['Country']
df_lan.index = df_lan.index.droplevel(-1)
df_lan.name = 'Country'
df_tvshows_country = df_tvshows_country.join(df_lan)
df_tvshows_country.drop_duplicates(inplace = True)
df_tvshows_country.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Language | ... | Runtime | Kind | Seasons | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Country | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Snowpiercer | 2013 | 18 | 6.9 | 94 | NA | Daveed Diggs,Iddo Goldberg,Mickey Sumner,Aliso... | Action,Drama,Sci-Fi,Thriller | English | ... | 60 | tv series | 3 | 1 | 0 | 0 | 0 | 1 | Netflix | United States |
| 1 | 2 | Philadelphia | 1993 | 13 | 8.8 | 80 | NA | Charlie Day,Glenn Howerton,Rob McElhenney,Kait... | Comedy | English | ... | 22 | tv series | 18 | 1 | 0 | 0 | 0 | 1 | Netflix | United States |
| 2 | 3 | Roma | 2018 | 18 | 8.7 | 93 | NA | Kevin McKidd,Ray Stevenson,Polly Walker,Kerry ... | Action,Drama,History,Romance,War | English | ... | 52 | tv series | 2 | 1 | 0 | 0 | 0 | 1 | Netflix | United Kingdom |
| 2 | 3 | Roma | 2018 | 18 | 8.7 | 93 | NA | Kevin McKidd,Ray Stevenson,Polly Walker,Kerry ... | Action,Drama,History,Romance,War | English | ... | 52 | tv series | 2 | 1 | 0 | 0 | 0 | 1 | Netflix | United States |
| 3 | 4 | Amy | 2015 | 18 | 7 | 87 | NA | Amy Brenneman,Richard T. Jones,Jessica Tuck,Ma... | Drama | English | ... | 60 | tv series | 6 | 1 | 0 | 1 | 1 | 1 | Netflix | United States |
5 rows × 21 columns
country_count = df_tvshows_country.groupby('Country')['Title'].count()
country_tvshows = df_tvshows_country.groupby('Country')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
country_data_tvshows = pd.concat([country_count, country_tvshows], axis = 1).reset_index().rename(columns = {'Title' : 'TV Shows Count'})
country_data_tvshows = country_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)
# Creating distinct dataframes only with the tvshows present on individual streaming platforms
netflix_country_tvshows = country_data_tvshows[country_data_tvshows['Netflix'] != 0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_country_tvshows = netflix_country_tvshows.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
hulu_country_tvshows = country_data_tvshows[country_data_tvshows['Hulu'] != 0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_country_tvshows = hulu_country_tvshows.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
prime_video_country_tvshows = country_data_tvshows[country_data_tvshows['Prime Video'] != 0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_country_tvshows = prime_video_country_tvshows.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'TV Shows Count'], axis = 1)
disney_country_tvshows = country_data_tvshows[country_data_tvshows['Disney+'] != 0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_country_tvshows = disney_country_tvshows.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'TV Shows Count'], axis = 1)
# Country with TV Shows Counts - All Platforms Combined
country_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)[:10]
| Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 69 | United States | 2705 | 729 | 1025 | 995 | 164 |
| 68 | United Kingdom | 771 | 284 | 145 | 385 | 13 |
| 32 | Japan | 411 | 147 | 231 | 94 | 3 |
| 8 | Canada | 378 | 139 | 77 | 182 | 10 |
| 57 | South Korea | 166 | 111 | 22 | 44 | 7 |
| 1 | Australia | 144 | 69 | 32 | 53 | 2 |
| 19 | France | 138 | 69 | 20 | 57 | 6 |
| 59 | Spain | 76 | 50 | 9 | 22 | 2 |
| 41 | Mexico | 75 | 49 | 15 | 12 | 2 |
| 20 | Germany | 71 | 32 | 7 | 32 | 1 |
fig = px.bar(x = country_data_tvshows['Country'][:50],
y = country_data_tvshows['TV Shows Count'][:50],
color = country_data_tvshows['TV Shows Count'][:50],
color_continuous_scale = 'Teal_r',
labels = { 'x' : 'Country', 'y' : 'TV Shows Count'},
title = 'Major Countries : All Platforms')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.choropleth(data_frame = country_data_tvshows, locations = 'Country', locationmode = 'country names', color = 'TV Shows Count', color_continuous_scale = 'deep')
fig.show()
df_country_high_tvshows = country_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False).reset_index()
df_country_high_tvshows = df_country_high_tvshows.drop(['index'], axis = 1)
# filter = (country_data_tvshows['TV Shows Count'] == (country_data_tvshows['TV Shows Count'].max()))
# df_country_high_tvshows = country_data_tvshows[filter]
# highest_rated_tvshows = country_data_tvshows.loc[country_data_tvshows['TV Shows Count'].idxmax()]
print('\nCountry with Highest Ever TV Shows Count are : All Platforms Combined\n')
df_country_high_tvshows.head(5)
Country with Highest Ever TV Shows Count are : All Platforms Combined
| Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | United States | 2705 | 729 | 1025 | 995 | 164 |
| 1 | United Kingdom | 771 | 284 | 145 | 385 | 13 |
| 2 | Japan | 411 | 147 | 231 | 94 | 3 |
| 3 | Canada | 378 | 139 | 77 | 182 | 10 |
| 4 | South Korea | 166 | 111 | 22 | 44 | 7 |
fig = px.bar(y = df_country_high_tvshows['Country'][:15],
x = df_country_high_tvshows['TV Shows Count'][:15],
color = df_country_high_tvshows['TV Shows Count'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Country', 'x' : 'TV Shows Count'},
title = 'Country with Highest TV Shows : All Platforms')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
df_country_low_tvshows = country_data_tvshows.sort_values(by = 'TV Shows Count', ascending = True).reset_index()
df_country_low_tvshows = df_country_low_tvshows.drop(['index'], axis = 1)
# filter = (country_data_tvshows['TV Shows Count'] == (country_data_tvshows['TV Shows Count'].min()))
# df_country_low_tvshows = country_data_tvshows[filter]
print('\nCountry with Lowest Ever TV Shows Count are : All Platforms Combined\n')
df_country_low_tvshows.head(5)
Country with Lowest Ever TV Shows Count are : All Platforms Combined
| Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | Lithuania | 1 | 0 | 0 | 1 | 0 |
| 1 | Serbia | 1 | 0 | 0 | 1 | 0 |
| 2 | Korea | 1 | 1 | 0 | 0 | 0 |
| 3 | Vietnam | 1 | 1 | 0 | 0 | 0 |
| 4 | Venezuela | 1 | 0 | 1 | 0 | 0 |
fig = px.bar(y = df_country_low_tvshows['Country'][:15],
x = df_country_low_tvshows['TV Shows Count'][:15],
color = df_country_low_tvshows['TV Shows Count'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Country', 'x' : 'TV Shows Count'},
title = 'Country with Lowest TV Shows Count : All Platforms')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
print(f'''
Total '{country_data_tvshows['Country'].unique().shape[0]}' unique Country Count s were Given, They were Like this,\n
{country_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)['Country'].unique()[:5]}\n
The Highest Ever TV Shows Count Ever Any TV Show Got is '{df_country_high_tvshows['Country'][0]}' : '{df_country_high_tvshows['TV Shows Count'].max()}'\n
The Lowest Ever TV Shows Count Ever Any TV Show Got is '{df_country_low_tvshows['Country'][0]}' : '{df_country_low_tvshows['TV Shows Count'].min()}'\n
''')
Total '74' unique Country Count s were Given, They were Like this,
['United States' 'United Kingdom' 'Japan' 'Canada' 'South Korea']
The Highest Ever TV Shows Count Ever Any TV Show Got is 'United States' : '2705'
The Lowest Ever TV Shows Count Ever Any TV Show Got is 'Lithuania' : '1'
fig = px.pie(country_data_tvshows[:10], names = 'Country', values = 'TV Shows Count', color_discrete_sequence = px.colors.sequential.Teal)
fig.update_traces(textposition = 'inside', textinfo = 'percent+label', title = 'TV Shows Count based on Country')
fig.show()
# netflix_country_tvshows = country_data_tvshows[country_data_tvshows['Netflix'] != 0].sort_values(by = 'Netflix', ascending = False).reset_index()
# netflix_country_tvshows = netflix_country_tvshows.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
netflix_country_high_tvshows = df_country_high_tvshows.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_country_high_tvshows = netflix_country_high_tvshows.drop(['index'], axis = 1)
netflix_country_low_tvshows = df_country_high_tvshows.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_country_low_tvshows = netflix_country_low_tvshows.drop(['index'], axis = 1)
netflix_country_high_tvshows.head(5)
| Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | United States | 2705 | 729 | 1025 | 995 | 164 |
| 1 | United Kingdom | 771 | 284 | 145 | 385 | 13 |
| 2 | Japan | 411 | 147 | 231 | 94 | 3 |
| 3 | Canada | 378 | 139 | 77 | 182 | 10 |
| 4 | South Korea | 166 | 111 | 22 | 44 | 7 |
fig = px.bar(x = netflix_country_high_tvshows['Country'][:15],
y = netflix_country_high_tvshows['Netflix'][:15],
color = netflix_country_high_tvshows['Netflix'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Country', 'x' : 'TV Shows Count'},
title = 'Country with Highest TV Shows : Netflix')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.choropleth(data_frame = netflix_country_tvshows, locations = 'Country', locationmode = 'country names', color = 'Netflix', color_continuous_scale = 'Reds')
fig.show()
# hulu_country_tvshows = country_data_tvshows[country_data_tvshows['Hulu'] != 0].sort_values(by = 'Hulu', ascending = False).reset_index()
# hulu_country_tvshows = hulu_country_tvshows.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
hulu_country_high_tvshows = df_country_high_tvshows.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_country_high_tvshows = hulu_country_high_tvshows.drop(['index'], axis = 1)
hulu_country_low_tvshows = df_country_high_tvshows.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_country_low_tvshows = hulu_country_low_tvshows.drop(['index'], axis = 1)
hulu_country_high_tvshows.head(5)
| Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | United States | 2705 | 729 | 1025 | 995 | 164 |
| 1 | Japan | 411 | 147 | 231 | 94 | 3 |
| 2 | United Kingdom | 771 | 284 | 145 | 385 | 13 |
| 3 | Canada | 378 | 139 | 77 | 182 | 10 |
| 4 | Australia | 144 | 69 | 32 | 53 | 2 |
fig = px.bar(x = hulu_country_high_tvshows['Country'][:15],
y = hulu_country_high_tvshows['Hulu'][:15],
color = hulu_country_high_tvshows['Hulu'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Country', 'x' : 'TV Shows Count'},
title = 'Country with Highest TV Shows : Hulu')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.choropleth(data_frame = hulu_country_tvshows, locations = 'Country', locationmode = 'country names', color = 'Hulu', color_continuous_scale = 'Greens')
fig.show()
# prime_video_country_tvshows = country_data_tvshows[country_data_tvshows['Prime Video'] != 0].sort_values(by = 'Prime Video', ascending = False).reset_index()
# prime_video_country_tvshows = prime_video_country_tvshows.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'TV Shows Count'], axis = 1)
prime_video_country_high_tvshows = df_country_high_tvshows.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_country_high_tvshows = prime_video_country_high_tvshows.drop(['index'], axis = 1)
prime_video_country_low_tvshows = df_country_high_tvshows.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_country_low_tvshows = prime_video_country_low_tvshows.drop(['index'], axis = 1)
prime_video_country_high_tvshows.head(5)
| Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | United States | 2705 | 729 | 1025 | 995 | 164 |
| 1 | United Kingdom | 771 | 284 | 145 | 385 | 13 |
| 2 | Canada | 378 | 139 | 77 | 182 | 10 |
| 3 | Japan | 411 | 147 | 231 | 94 | 3 |
| 4 | France | 138 | 69 | 20 | 57 | 6 |
fig = px.bar(x = prime_video_country_high_tvshows['Country'][:15],
y = prime_video_country_high_tvshows['Prime Video'][:15],
color = prime_video_country_high_tvshows['Prime Video'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Country', 'x' : 'TV Shows Count'},
title = 'Country with Highest TV Shows : Prime Video')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.choropleth(data_frame = prime_video_country_tvshows, locations = 'Country', locationmode = 'country names', color = 'Prime Video', color_continuous_scale = 'Blues')
fig.show()
# disney_country_tvshows = country_data_tvshows[country_data_tvshows['Disney+'] != 0].sort_values(by = 'Disney+', ascending = False).reset_index()
# disney_country_tvshows = disney_country_tvshows.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'TV Shows Count'], axis = 1)
disney_country_high_tvshows = df_country_high_tvshows.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_country_high_tvshows = disney_country_high_tvshows.drop(['index'], axis = 1)
disney_country_low_tvshows = df_country_high_tvshows.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_country_low_tvshows = disney_country_low_tvshows.drop(['index'], axis = 1)
disney_country_high_tvshows.head(5)
| Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | United States | 2705 | 729 | 1025 | 995 | 164 |
| 1 | United Kingdom | 771 | 284 | 145 | 385 | 13 |
| 2 | Canada | 378 | 139 | 77 | 182 | 10 |
| 3 | South Korea | 166 | 111 | 22 | 44 | 7 |
| 4 | France | 138 | 69 | 20 | 57 | 6 |
fig = px.bar(x = disney_country_high_tvshows['Country'][:15],
y = disney_country_high_tvshows['Disney+'][:15],
color = disney_country_high_tvshows['Disney+'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'Country', 'x' : 'TV Shows Count'},
title = 'Country with Highest TV Shows : Disney+')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
fig = px.choropleth(data_frame = disney_country_tvshows, locations = 'Country', locationmode = 'country names', color = 'Disney+', color_continuous_scale = 'BuPu')
fig.show()
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(country_data_tvshows['TV Shows Count'], bins = 20, kde = True, ax = ax[0])
sns.boxplot(country_data_tvshows['TV Shows Count'], ax = ax[1])
plt.show()
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Country TV Shows Count Per Platform')
# Plotting the information from each dataset into a histogram
sns.histplot(disney_country_tvshows['Disney+'][:50], color = 'darkblue', legend = True, kde = True)
sns.histplot(prime_video_country_tvshows['Prime Video'][:50], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_country_tvshows['Netflix'][:50], color = 'red', legend = True, kde = True)
sns.histplot(hulu_country_tvshows['Hulu'][:50], color = 'lightgreen', legend = True, kde = True)
# Setting the legend
plt.legend(['Disney+', 'Prime Video', 'Netflix', 'Hulu'])
plt.show()
print(f'''
The Country with Highest TV Shows Count Ever Got is '{df_country_high_tvshows['Country'][0]}' : '{df_country_high_tvshows['TV Shows Count'].max()}'\n
The Country with Lowest TV Shows Count Ever Got is '{df_country_low_tvshows['Country'][0]}' : '{df_country_low_tvshows['TV Shows Count'].min()}'\n
The Country with Highest TV Shows Count on 'Netflix' is '{netflix_country_high_tvshows['Country'][0]}' : '{netflix_country_high_tvshows['Netflix'].max()}'\n
The Country with Lowest TV Shows Count on 'Netflix' is '{netflix_country_low_tvshows['Country'][0]}' : '{netflix_country_low_tvshows['Netflix'].min()}'\n
The Country with Highest TV Shows Count on 'Hulu' is '{hulu_country_high_tvshows['Country'][0]}' : '{hulu_country_high_tvshows['Hulu'].max()}'\n
The Country with Lowest TV Shows Count on 'Hulu' is '{hulu_country_low_tvshows['Country'][0]}' : '{hulu_country_low_tvshows['Hulu'].min()}'\n
The Country with Highest TV Shows Count on 'Prime Video' is '{prime_video_country_high_tvshows['Country'][0]}' : '{prime_video_country_high_tvshows['Prime Video'].max()}'\n
The Country with Lowest TV Shows Count on 'Prime Video' is '{prime_video_country_low_tvshows['Country'][0]}' : '{prime_video_country_low_tvshows['Prime Video'].min()}'\n
The Country with Highest TV Shows Count on 'Disney+' is '{disney_country_high_tvshows['Country'][0]}' : '{disney_country_high_tvshows['Disney+'].max()}'\n
The Country with Lowest TV Shows Count on 'Disney+' is '{disney_country_low_tvshows['Country'][0]}' : '{disney_country_low_tvshows['Disney+'].min()}'\n
''')
The Country with Highest TV Shows Count Ever Got is 'United States' : '2705'
The Country with Lowest TV Shows Count Ever Got is 'Lithuania' : '1'
The Country with Highest TV Shows Count on 'Netflix' is 'United States' : '729'
The Country with Lowest TV Shows Count on 'Netflix' is 'Lithuania' : '0'
The Country with Highest TV Shows Count on 'Hulu' is 'United States' : '1025'
The Country with Lowest TV Shows Count on 'Hulu' is 'Lithuania' : '0'
The Country with Highest TV Shows Count on 'Prime Video' is 'United States' : '995'
The Country with Lowest TV Shows Count on 'Prime Video' is 'Korea' : '0'
The Country with Highest TV Shows Count on 'Disney+' is 'United States' : '164'
The Country with Lowest TV Shows Count on 'Disney+' is 'Chile' : '0'
# Distribution of tvshows country in each platform
plt.figure(figsize = (20, 5))
plt.title('Country with TV Shows Count for All Platforms')
sns.violinplot(x = country_data_tvshows['TV Shows Count'][:100], color = 'gold', legend = True, kde = True, shade = False)
plt.show()
# Distribution of Country TV Shows Count in each platform
f1, ax1 = plt.subplots(1, 2 , figsize = (20, 5))
sns.violinplot(x = netflix_country_tvshows['Netflix'][:100], color = 'red', ax = ax1[0])
sns.violinplot(x = hulu_country_tvshows['Hulu'][:100], color = 'lightgreen', ax = ax1[1])
f2, ax2 = plt.subplots(1, 2 , figsize = (20, 5))
sns.violinplot(x = prime_video_country_tvshows['Prime Video'][:100], color = 'lightblue', ax = ax2[0])
sns.violinplot(x = disney_country_tvshows['Disney+'][:100], color = 'darkblue', ax = ax2[1])
plt.show()
print(f'''
Accross All Platforms the Average TV Shows Count of Country is '{round(country_data_tvshows['TV Shows Count'].mean(), ndigits = 2)}'\n
The Average TV Shows Count of Country on 'Netflix' is '{round(netflix_country_tvshows['Netflix'].mean(), ndigits = 2)}'\n
The Average TV Shows Count of Country on 'Hulu' is '{round(hulu_country_tvshows['Hulu'].mean(), ndigits = 2)}'\n
The Average TV Shows Count of Country on 'Prime Video' is '{round(prime_video_country_tvshows['Prime Video'].mean(), ndigits = 2)}'\n
The Average TV Shows Count of Country on 'Disney+' is '{round(disney_country_tvshows['Disney+'].mean(), ndigits = 2)}'\n
''')
Accross All Platforms the Average TV Shows Count of Country is '75.93'
The Average TV Shows Count of Country on 'Netflix' is '33.11'
The Average TV Shows Count of Country on 'Hulu' is '46.03'
The Average TV Shows Count of Country on 'Prime Video' is '38.65'
The Average TV Shows Count of Country on 'Disney+' is '11.4'
print(f'''
Accross All Platforms Total Count of Country is '{country_data_tvshows['Country'].unique().shape[0]}'\n
Total Count of Country on 'Netflix' is '{netflix_country_tvshows['Country'].unique().shape[0]}'\n
Total Count of Country on 'Hulu' is '{hulu_country_tvshows['Country'].unique().shape[0]}'\n
Total Count of Country on 'Prime Video' is '{prime_video_country_tvshows['Country'].unique().shape[0]}'\n
Total Count of Country on 'Disney+' is '{disney_country_tvshows['Country'].unique().shape[0]}'\n
''')
Accross All Platforms Total Count of Country is '74'
Total Count of Country on 'Netflix' is '62'
Total Count of Country on 'Hulu' is '36'
Total Count of Country on 'Prime Video' is '55'
Total Count of Country on 'Disney+' is '20'
plt.figure(figsize = (20, 5))
sns.lineplot(x = country_data_tvshows['Country'][:10], y = country_data_tvshows['Netflix'][:10], color = 'red')
sns.lineplot(x = country_data_tvshows['Country'][:10], y = country_data_tvshows['Hulu'][:10], color = 'lightgreen')
sns.lineplot(x = country_data_tvshows['Country'][:10], y = country_data_tvshows['Prime Video'][:10], color = 'lightblue')
sns.lineplot(x = country_data_tvshows['Country'][:10], y = country_data_tvshows['Disney+'][:10], color = 'darkblue')
plt.xlabel('Country', fontsize = 20)
plt.ylabel('TV Shows Count', fontsize = 20)
plt.show()
fig, axes = plt.subplots(2, 2, figsize = (20 , 10))
n_co_ax1 = sns.lineplot(y = country_data_tvshows['Country'][:10], x = country_data_tvshows['Netflix'][:10], color = 'red', ax = axes[0, 0])
h_co_ax2 = sns.lineplot(y = country_data_tvshows['Country'][:10], x = country_data_tvshows['Hulu'][:10], color = 'lightgreen', ax = axes[0, 1])
p_co_ax3 = sns.lineplot(y = country_data_tvshows['Country'][:10], x = country_data_tvshows['Prime Video'][:10], color = 'lightblue', ax = axes[1, 0])
d_co_ax4 = sns.lineplot(y = country_data_tvshows['Country'][:10], x = country_data_tvshows['Disney+'][:10], color = 'darkblue', ax = axes[1, 1])
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
n_co_ax1.title.set_text(labels[0])
h_co_ax2.title.set_text(labels[1])
p_co_ax3.title.set_text(labels[2])
d_co_ax4.title.set_text(labels[3])
plt.show()
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
n_co_ax1 = sns.barplot(y = netflix_country_tvshows['Country'][:10], x = netflix_country_tvshows['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_co_ax2 = sns.barplot(y = hulu_country_tvshows['Country'][:10], x = hulu_country_tvshows['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_co_ax3 = sns.barplot(y = prime_video_country_tvshows['Country'][:10], x = prime_video_country_tvshows['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_co_ax4 = sns.barplot(y = disney_country_tvshows['Country'][:10], x = disney_country_tvshows['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
n_co_ax1.title.set_text(labels[0])
h_co_ax2.title.set_text(labels[1])
p_co_ax3.title.set_text(labels[2])
d_co_ax4.title.set_text(labels[3])
plt.show()
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Country TV Shows Count Per Platform')
# Plotting the information from each dataset into a histogram
sns.kdeplot(netflix_country_tvshows['Netflix'][:10], color = 'red', legend = True)
sns.kdeplot(hulu_country_tvshows['Hulu'][:10], color = 'green', legend = True)
sns.kdeplot(prime_video_country_tvshows['Prime Video'][:10], color = 'lightblue', legend = True)
sns.kdeplot(disney_country_tvshows['Disney+'][:10], color = 'darkblue', legend = True)
# Setting the legend
plt.legend(['Netflix', 'Hulu', 'Prime Video', 'Disney+'])
plt.show()
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
n_co_ax1 = sns.barplot(y = country_data_tvshows['Country'][:10], x = country_data_tvshows['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_co_ax2 = sns.barplot(y = country_data_tvshows['Country'][:10], x = country_data_tvshows['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_co_ax3 = sns.barplot(y = country_data_tvshows['Country'][:10], x = country_data_tvshows['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_co_ax4 = sns.barplot(y = country_data_tvshows['Country'][:10], x = country_data_tvshows['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
n_co_ax1.title.set_text(labels[0])
h_co_ax2.title.set_text(labels[1])
p_co_ax3.title.set_text(labels[2])
d_co_ax4.title.set_text(labels[3])
plt.show()
df_tvshows_mixed_countries.drop(df_tvshows_mixed_countries.loc[df_tvshows_mixed_countries['Country'] == "NA"].index, inplace = True)
# df_tvshows_mixed_countries = df_tvshows_mixed_countries[df_tvshows_mixed_countries.Country != "NA"]
df_tvshows_mixed_countries.drop(df_tvshows_mixed_countries.loc[df_tvshows_mixed_countries['Number of Countries'] == 1].index, inplace = True)
df_tvshows_mixed_countries.head(5)
| ID | Title | Year | Age | IMDb | Rotten Tomatoes | Directors | Cast | Genres | Country | ... | Runtime | Kind | Seasons | Netflix | Hulu | Prime Video | Disney+ | Type | Service Provider | Number of Countries | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 3 | Roma | 2018 | 18 | 8.7 | 93 | NA | Kevin McKidd,Ray Stevenson,Polly Walker,Kerry ... | Action,Drama,History,Romance,War | United Kingdom,United States | ... | 52 | tv series | 2 | 1 | 0 | 0 | 0 | 1 | Netflix | 2 |
| 4 | 5 | The Young Offenders | 2016 | NR | 8 | 100 | NA | Alex Murphy,Chris Walley,Hilary Rose,Dominic M... | Comedy | United Kingdom,Ireland | ... | 30 | tv series | 3 | 1 | 0 | 0 | 0 | 1 | Netflix | 2 |
| 23 | 24 | La tribu | 2018 | 18 | 7.6 | 71 | NA | Caleb Ross,Victoria Spence,Meryl Cassie,Antoni... | Drama,Romance,Sci-Fi | New Zealand,United Kingdom | ... | 30 | tv series | 5 | 1 | 0 | 0 | 0 | 1 | Netflix | 2 |
| 38 | 39 | Heroine | 2012 | NR | 7.4 | 50 | NA | Eliza Dushku,Shawn Reaves,Zach Galifianakis,A.... | Drama,Fantasy,Mystery,Thriller | United States,Canada | ... | 43 | tv series | 2 | 1 | 0 | 0 | 0 | 1 | Netflix | 2 |
| 51 | 52 | Abominable Christmas | 2012 | NR | 5.3 | NA | Chad Van De Keere | Ariel Winter,Drake Bell,Emilio Estevez,Isabell... | Animation,Short,Adventure,Comedy,Family | United States,India,Canada,Sri Lanka,South Afr... | ... | 43 | tv series | NA | 1 | 0 | 0 | 0 | 1 | Netflix | 6 |
5 rows × 22 columns
mixed_countries_count = df_tvshows_mixed_countries.groupby('Country')['Title'].count()
mixed_countries_tvshows = df_tvshows_mixed_countries.groupby('Country')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
mixed_countries_data_tvshows = pd.concat([mixed_countries_count, mixed_countries_tvshows], axis = 1).reset_index().rename(columns = {'Title' : 'TV Shows Count', 'Country' : 'Mixed Country'})
mixed_countries_data_tvshows = mixed_countries_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)
mixed_countries_data_tvshows.head(5)
| Mixed Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 178 | United States,Canada | 53 | 20 | 13 | 20 | 5 |
| 168 | United Kingdom,United States | 47 | 8 | 10 | 31 | 1 |
| 25 | Canada,United States | 42 | 14 | 14 | 19 | 1 |
| 227 | United States,United Kingdom | 25 | 7 | 6 | 14 | 0 |
| 155 | United Kingdom,Ireland | 8 | 3 | 1 | 4 | 0 |
# Mixed Country with TV Shows Counts - All Platforms Combined
mixed_countries_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)[:10]
| Mixed Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 178 | United States,Canada | 53 | 20 | 13 | 20 | 5 |
| 168 | United Kingdom,United States | 47 | 8 | 10 | 31 | 1 |
| 25 | Canada,United States | 42 | 14 | 14 | 19 | 1 |
| 227 | United States,United Kingdom | 25 | 7 | 6 | 14 | 0 |
| 155 | United Kingdom,Ireland | 8 | 3 | 1 | 4 | 0 |
| 207 | United States,Japan | 8 | 6 | 2 | 1 | 0 |
| 218 | United States,South Korea | 7 | 1 | 2 | 2 | 2 |
| 16 | Canada,France | 6 | 1 | 2 | 5 | 0 |
| 23 | Canada,United Kingdom | 6 | 2 | 3 | 1 | 0 |
| 145 | United Kingdom,Canada,United States | 5 | 2 | 0 | 3 | 0 |
df_mixed_countries_high_tvshows = mixed_countries_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False).reset_index()
df_mixed_countries_high_tvshows = df_mixed_countries_high_tvshows.drop(['index'], axis = 1)
# filter = (mixed_countries_data_tvshows['TV Shows Count'] = = (mixed_countries_data_tvshows['TV Shows Count'].max()))
# df_mixed_countries_high_tvshows = mixed_countries_data_tvshows[filter]
# highest_rated_tvshows = mixed_countries_data_tvshows.loc[mixed_countries_data_tvshows['TV Shows Count'].idxmax()]
print('\nMixed Country with Highest Ever TV Shows Count are : All Platforms Combined\n')
df_mixed_countries_high_tvshows.head(5)
Mixed Country with Highest Ever TV Shows Count are : All Platforms Combined
| Mixed Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | United States,Canada | 53 | 20 | 13 | 20 | 5 |
| 1 | United Kingdom,United States | 47 | 8 | 10 | 31 | 1 |
| 2 | Canada,United States | 42 | 14 | 14 | 19 | 1 |
| 3 | United States,United Kingdom | 25 | 7 | 6 | 14 | 0 |
| 4 | United Kingdom,Ireland | 8 | 3 | 1 | 4 | 0 |
fig = px.bar(y = df_mixed_countries_high_tvshows['Mixed Country'][:15],
x = df_mixed_countries_high_tvshows['TV Shows Count'][:15],
color = df_mixed_countries_high_tvshows['TV Shows Count'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'TV Shows', 'x' : 'Number of Mixed Country'},
title = 'TV Shows with Highest Number of Mixed Countries : All Platforms')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
df_mixed_countries_low_tvshows = mixed_countries_data_tvshows.sort_values(by = 'TV Shows Count', ascending = True).reset_index()
df_mixed_countries_low_tvshows = df_mixed_countries_low_tvshows.drop(['index'], axis = 1)
# filter = (mixed_countries_data_tvshows['TV Shows Count'] = = (mixed_countries_data_tvshows['TV Shows Count'].min()))
# df_mixed_countries_low_tvshows = mixed_countries_data_tvshows[filter]
print('\nMixed Country with Lowest Ever TV Shows Count are : All Platforms Combined\n')
df_mixed_countries_low_tvshows.head(5)
Mixed Country with Lowest Ever TV Shows Count are : All Platforms Combined
| Mixed Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | United States,Ireland,United Kingdom | 1 | 0 | 0 | 1 | 0 |
| 1 | France,South Korea,Spain | 1 | 1 | 0 | 1 | 0 |
| 2 | France,South Korea,United States,Canada | 1 | 0 | 0 | 1 | 0 |
| 3 | France,United Kingdom,United States | 1 | 1 | 0 | 0 | 0 |
| 4 | France,United States | 1 | 1 | 0 | 1 | 0 |
fig = px.bar(y = df_mixed_countries_low_tvshows['Mixed Country'][:15],
x = df_mixed_countries_low_tvshows['TV Shows Count'][:15],
color = df_mixed_countries_low_tvshows['TV Shows Count'][:15],
color_continuous_scale = 'Teal_r',
labels = { 'y' : 'TV Shows', 'x' : 'Number of Mixed Country'},
title = 'TV Shows with Lowest Number of Mixed Countries : All Platforms')
fig.update_layout(plot_bgcolor = 'white')
fig.show()
print(f'''
Total '{df_tvshows_countries['Country'].count()}' Titles are available on All Platforms, out of which\n
You Can Choose to see TV Shows from Total '{mixed_countries_data_tvshows['Mixed Country'].unique().shape[0]}' Mixed Country, They were Like this, \n
{mixed_countries_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)['Mixed Country'].head(5).unique()} etc. \n
The Mixed Country with Highest TV Shows Count have '{mixed_countries_data_tvshows['TV Shows Count'].max()}' TV Shows Available is '{df_mixed_countries_high_tvshows['Mixed Country'][0]}', &\n
The Mixed Country with Lowest TV Shows Count have '{mixed_countries_data_tvshows['TV Shows Count'].min()}' TV Shows Available is '{df_mixed_countries_low_tvshows['Mixed Country'][0]}'
''')
Total '4883' Titles are available on All Platforms, out of which
You Can Choose to see TV Shows from Total '233' Mixed Country, They were Like this,
['United States,Canada' 'United Kingdom,United States'
'Canada,United States' 'United States,United Kingdom'
'United Kingdom,Ireland'] etc.
The Mixed Country with Highest TV Shows Count have '53' TV Shows Available is 'United States,Canada', &
The Mixed Country with Lowest TV Shows Count have '1' TV Shows Available is 'United States,Ireland,United Kingdom'
fig = px.pie(mixed_countries_data_tvshows[:10], names = 'Mixed Country', values = 'TV Shows Count', color_discrete_sequence = px.colors.sequential.Teal)
fig.update_traces(textposition = 'inside', textinfo = 'percent+label', title = 'TV Shows Count based on Mixed Country')
fig.show()
# netflix_mixed_countries_tvshows = mixed_countries_data_tvshows[mixed_countries_data_tvshows['Netflix'] != 0].sort_values(by = 'Netflix', ascending = False).reset_index()
# netflix_mixed_countries_tvshows = netflix_mixed_countries_tvshows.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
netflix_mixed_countries_high_tvshows = df_mixed_countries_high_tvshows.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_mixed_countries_high_tvshows = netflix_mixed_countries_high_tvshows.drop(['index'], axis = 1)
netflix_mixed_countries_low_tvshows = df_mixed_countries_high_tvshows.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_mixed_countries_low_tvshows = netflix_mixed_countries_low_tvshows.drop(['index'], axis = 1)
netflix_mixed_countries_high_tvshows.head(5)
| Mixed Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | United States,Canada | 53 | 20 | 13 | 20 | 5 |
| 1 | Canada,United States | 42 | 14 | 14 | 19 | 1 |
| 2 | United Kingdom,United States | 47 | 8 | 10 | 31 | 1 |
| 3 | United States,United Kingdom | 25 | 7 | 6 | 14 | 0 |
| 4 | United States,Japan | 8 | 6 | 2 | 1 | 0 |
# hulu_mixed_countries_tvshows = mixed_countries_data_tvshows[mixed_countries_data_tvshows['Hulu'] != 0].sort_values(by = 'Hulu', ascending = False).reset_index()
# hulu_mixed_countries_tvshows = hulu_mixed_countries_tvshows.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
hulu_mixed_countries_high_tvshows = df_mixed_countries_high_tvshows.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_mixed_countries_high_tvshows = hulu_mixed_countries_high_tvshows.drop(['index'], axis = 1)
hulu_mixed_countries_low_tvshows = df_mixed_countries_high_tvshows.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_mixed_countries_low_tvshows = hulu_mixed_countries_low_tvshows.drop(['index'], axis = 1)
hulu_mixed_countries_high_tvshows.head(5)
| Mixed Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | Canada,United States | 42 | 14 | 14 | 19 | 1 |
| 1 | United States,Canada | 53 | 20 | 13 | 20 | 5 |
| 2 | United Kingdom,United States | 47 | 8 | 10 | 31 | 1 |
| 3 | United States,United Kingdom | 25 | 7 | 6 | 14 | 0 |
| 4 | Canada,United Kingdom | 6 | 2 | 3 | 1 | 0 |
# prime_video_mixed_countries_tvshows = mixed_countries_data_tvshows[mixed_countries_data_tvshows['Prime Video'] != 0].sort_values(by = 'Prime Video', ascending = False).reset_index()
# prime_video_mixed_countries_tvshows = prime_video_mixed_countries_tvshows.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'TV Shows Count'], axis = 1)
prime_video_mixed_countries_high_tvshows = df_mixed_countries_high_tvshows.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_mixed_countries_high_tvshows = prime_video_mixed_countries_high_tvshows.drop(['index'], axis = 1)
prime_video_mixed_countries_low_tvshows = df_mixed_countries_high_tvshows.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_mixed_countries_low_tvshows = prime_video_mixed_countries_low_tvshows.drop(['index'], axis = 1)
prime_video_mixed_countries_high_tvshows.head(5)
| Mixed Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | United Kingdom,United States | 47 | 8 | 10 | 31 | 1 |
| 1 | United States,Canada | 53 | 20 | 13 | 20 | 5 |
| 2 | Canada,United States | 42 | 14 | 14 | 19 | 1 |
| 3 | United States,United Kingdom | 25 | 7 | 6 | 14 | 0 |
| 4 | Canada,France | 6 | 1 | 2 | 5 | 0 |
# disney_mixed_countries_tvshows = mixed_countries_data_tvshows[mixed_countries_data_tvshows['Disney+'] != 0].sort_values(by = 'Disney+', ascending = False).reset_index()
# disney_mixed_countries_tvshows = disney_mixed_countries_tvshows.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'TV Shows Count'], axis = 1)
disney_mixed_countries_high_tvshows = df_mixed_countries_high_tvshows.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_mixed_countries_high_tvshows = disney_mixed_countries_high_tvshows.drop(['index'], axis = 1)
disney_mixed_countries_low_tvshows = df_mixed_countries_high_tvshows.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_mixed_countries_low_tvshows = disney_mixed_countries_low_tvshows.drop(['index'], axis = 1)
disney_mixed_countries_high_tvshows.head(5)
| Mixed Country | TV Shows Count | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|
| 0 | United States,Canada | 53 | 20 | 13 | 20 | 5 |
| 1 | United States,India | 2 | 0 | 0 | 0 | 2 |
| 2 | United States,South Korea | 7 | 1 | 2 | 2 | 2 |
| 3 | United States,Hong Kong,South Korea,France,Can... | 1 | 0 | 0 | 0 | 1 |
| 4 | Ireland,United States | 2 | 0 | 0 | 1 | 1 |
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(mixed_countries_data_tvshows['TV Shows Count'], bins = 20, kde = True, ax = ax[0])
sns.boxplot(mixed_countries_data_tvshows['TV Shows Count'], ax = ax[1])
plt.show()
# Creating distinct dataframes only with the tvshows present on individual streaming platforms
netflix_mixed_countries_tvshows = mixed_countries_data_tvshows[mixed_countries_data_tvshows['Netflix'] != 0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_mixed_countries_tvshows = netflix_mixed_countries_tvshows.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
hulu_mixed_countries_tvshows = mixed_countries_data_tvshows[mixed_countries_data_tvshows['Hulu'] != 0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_mixed_countries_tvshows = hulu_mixed_countries_tvshows.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
prime_video_mixed_countries_tvshows = mixed_countries_data_tvshows[mixed_countries_data_tvshows['Prime Video'] != 0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_mixed_countries_tvshows = prime_video_mixed_countries_tvshows.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'TV Shows Count'], axis = 1)
disney_mixed_countries_tvshows = mixed_countries_data_tvshows[mixed_countries_data_tvshows['Disney+'] != 0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_mixed_countries_tvshows = disney_mixed_countries_tvshows.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'TV Shows Count'], axis = 1)
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Mixed Country TV Shows Count Per Platform')
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_mixed_countries_tvshows['Prime Video'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_mixed_countries_tvshows['Netflix'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_mixed_countries_tvshows['Hulu'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_mixed_countries_tvshows['Disney+'][:100], color = 'darkblue', legend = True, kde = True)
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
print(f'''
The Mixed Country with Highest TV Shows Count Ever Got is '{df_mixed_countries_high_tvshows['Mixed Country'][0]}' : '{df_mixed_countries_high_tvshows['TV Shows Count'].max()}'\n
The Mixed Country with Lowest TV Shows Count Ever Got is '{df_mixed_countries_low_tvshows['Mixed Country'][0]}' : '{df_mixed_countries_low_tvshows['TV Shows Count'].min()}'\n
The Mixed Country with Highest TV Shows Count on 'Netflix' is '{netflix_mixed_countries_high_tvshows['Mixed Country'][0]}' : '{netflix_mixed_countries_high_tvshows['Netflix'].max()}'\n
The Mixed Country with Lowest TV Shows Count on 'Netflix' is '{netflix_mixed_countries_low_tvshows['Mixed Country'][0]}' : '{netflix_mixed_countries_low_tvshows['Netflix'].min()}'\n
The Mixed Country with Highest TV Shows Count on 'Hulu' is '{hulu_mixed_countries_high_tvshows['Mixed Country'][0]}' : '{hulu_mixed_countries_high_tvshows['Hulu'].max()}'\n
The Mixed Country with Lowest TV Shows Count on 'Hulu' is '{hulu_mixed_countries_low_tvshows['Mixed Country'][0]}' : '{hulu_mixed_countries_low_tvshows['Hulu'].min()}'\n
The Mixed Country with Highest TV Shows Count on 'Prime Video' is '{prime_video_mixed_countries_high_tvshows['Mixed Country'][0]}' : '{prime_video_mixed_countries_high_tvshows['Prime Video'].max()}'\n
The Mixed Country with Lowest TV Shows Count on 'Prime Video' is '{prime_video_mixed_countries_low_tvshows['Mixed Country'][0]}' : '{prime_video_mixed_countries_low_tvshows['Prime Video'].min()}'\n
The Mixed Country with Highest TV Shows Count on 'Disney+' is '{disney_mixed_countries_high_tvshows['Mixed Country'][0]}' : '{disney_mixed_countries_high_tvshows['Disney+'].max()}'\n
The Mixed Country with Lowest TV Shows Count on 'Disney+' is '{disney_mixed_countries_low_tvshows['Mixed Country'][0]}' : '{disney_mixed_countries_low_tvshows['Disney+'].min()}'\n
''')
The Mixed Country with Highest TV Shows Count Ever Got is 'United States,Canada' : '53'
The Mixed Country with Lowest TV Shows Count Ever Got is 'United States,Ireland,United Kingdom' : '1'
The Mixed Country with Highest TV Shows Count on 'Netflix' is 'United States,Canada' : '20'
The Mixed Country with Lowest TV Shows Count on 'Netflix' is 'Japan,France' : '0'
The Mixed Country with Highest TV Shows Count on 'Hulu' is 'Canada,United States' : '14'
The Mixed Country with Lowest TV Shows Count on 'Hulu' is 'United States,United Kingdom,South Korea' : '0'
The Mixed Country with Highest TV Shows Count on 'Prime Video' is 'United Kingdom,United States' : '31'
The Mixed Country with Lowest TV Shows Count on 'Prime Video' is 'Japan,France' : '0'
The Mixed Country with Highest TV Shows Count on 'Disney+' is 'United States,Canada' : '5'
The Mixed Country with Lowest TV Shows Count on 'Disney+' is 'Japan,France' : '0'
print(f'''
Accross All Platforms the Average TV Shows Count of Mixed Country is '{round(mixed_countries_data_tvshows['TV Shows Count'].mean(), ndigits = 2)}'\n
The Average TV Shows Count of Mixed Country on 'Netflix' is '{round(netflix_mixed_countries_tvshows['Netflix'].mean(), ndigits = 2)}'\n
The Average TV Shows Count of Mixed Country on 'Hulu' is '{round(hulu_mixed_countries_tvshows['Hulu'].mean(), ndigits = 2)}'\n
The Average TV Shows Count of Mixed Country on 'Prime Video' is '{round(prime_video_mixed_countries_tvshows['Prime Video'].mean(), ndigits = 2)}'\n
The Average TV Shows Count of Mixed Country on 'Disney+' is '{round(disney_mixed_countries_tvshows['Disney+'].mean(), ndigits = 2)}'\n
''')
Accross All Platforms the Average TV Shows Count of Mixed Country is '2.14'
The Average TV Shows Count of Mixed Country on 'Netflix' is '1.57'
The Average TV Shows Count of Mixed Country on 'Hulu' is '1.84'
The Average TV Shows Count of Mixed Country on 'Prime Video' is '1.89'
The Average TV Shows Count of Mixed Country on 'Disney+' is '1.3'
print(f'''
Accross All Platforms Total Count of Mixed Country is '{mixed_countries_data_tvshows['Mixed Country'].unique().shape[0]}'\n
Total Count of Mixed Country on 'Netflix' is '{netflix_mixed_countries_tvshows['Mixed Country'].unique().shape[0]}'\n
Total Count of Mixed Country on 'Hulu' is '{hulu_mixed_countries_tvshows['Mixed Country'].unique().shape[0]}'\n
Total Count of Mixed Country on 'Prime Video' is '{prime_video_mixed_countries_tvshows['Mixed Country'].unique().shape[0]}'\n
Total Count of Mixed Country on 'Disney+' is '{disney_mixed_countries_tvshows['Mixed Country'].unique().shape[0]}'\n
''')
Accross All Platforms Total Count of Mixed Country is '233'
Total Count of Mixed Country on 'Netflix' is '125'
Total Count of Mixed Country on 'Hulu' is '61'
Total Count of Mixed Country on 'Prime Video' is '114'
Total Count of Mixed Country on 'Disney+' is '20'
plt.figure(figsize = (20, 5))
sns.lineplot(x = mixed_countries_data_tvshows['Mixed Country'][:5], y = mixed_countries_data_tvshows['Netflix'][:5], color = 'red')
sns.lineplot(x = mixed_countries_data_tvshows['Mixed Country'][:5], y = mixed_countries_data_tvshows['Hulu'][:5], color = 'lightgreen')
sns.lineplot(x = mixed_countries_data_tvshows['Mixed Country'][:5], y = mixed_countries_data_tvshows['Prime Video'][:5], color = 'lightblue')
sns.lineplot(x = mixed_countries_data_tvshows['Mixed Country'][:5], y = mixed_countries_data_tvshows['Disney+'][:5], color = 'darkblue')
plt.xlabel('Mixed Country', fontsize = 15)
plt.ylabel('TV Shows Count', fontsize = 15)
plt.show()
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
n_co_ax1 = sns.barplot(y = mixed_countries_data_tvshows['Mixed Country'][:10], x = mixed_countries_data_tvshows['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_co_ax2 = sns.barplot(y = mixed_countries_data_tvshows['Mixed Country'][:10], x = mixed_countries_data_tvshows['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_co_ax3 = sns.barplot(y = mixed_countries_data_tvshows['Mixed Country'][:10], x = mixed_countries_data_tvshows['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_co_ax4 = sns.barplot(y = mixed_countries_data_tvshows['Mixed Country'][:10], x = mixed_countries_data_tvshows['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
n_co_ax1.title.set_text(labels[0])
h_co_ax2.title.set_text(labels[1])
p_co_ax3.title.set_text(labels[2])
d_co_ax4.title.set_text(labels[3])
plt.show()
fig, axes = plt.subplots(2, 2, figsize = (20 , 10))
n_mco_ax1 = sns.lineplot(y = mixed_countries_data_tvshows['Mixed Country'][:10], x = mixed_countries_data_tvshows['Netflix'][:10], color = 'red', ax = axes[0, 0])
h_mco_ax2 = sns.lineplot(y = mixed_countries_data_tvshows['Mixed Country'][:10], x = mixed_countries_data_tvshows['Hulu'][:10], color = 'lightgreen', ax = axes[0, 1])
p_mco_ax3 = sns.lineplot(y = mixed_countries_data_tvshows['Mixed Country'][:10], x = mixed_countries_data_tvshows['Prime Video'][:10], color = 'lightblue', ax = axes[1, 0])
d_mco_ax4 = sns.lineplot(y = mixed_countries_data_tvshows['Mixed Country'][:10], x = mixed_countries_data_tvshows['Disney+'][:10], color = 'darkblue', ax = axes[1, 1])
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
n_mco_ax1.title.set_text(labels[0])
h_mco_ax2.title.set_text(labels[1])
p_mco_ax3.title.set_text(labels[2])
d_mco_ax4.title.set_text(labels[3])
plt.show()
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Mixed Country TV Shows Count Per Platform')
# Plotting the information from each dataset into a histogram
sns.kdeplot(netflix_mixed_countries_tvshows['Netflix'][:50], color = 'red', legend = True)
sns.kdeplot(hulu_mixed_countries_tvshows['Hulu'][:50], color = 'green', legend = True)
sns.kdeplot(prime_video_mixed_countries_tvshows['Prime Video'][:50], color = 'lightblue', legend = True)
sns.kdeplot(disney_mixed_countries_tvshows['Disney+'][:50], color = 'darkblue', legend = True)
# Setting the legend
plt.legend(['Netflix', 'Hulu', 'Prime Video', 'Disney+'])
plt.show()
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
n_mco_ax1 = sns.barplot(y = netflix_mixed_countries_tvshows['Mixed Country'][:10], x = netflix_mixed_countries_tvshows['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_mco_ax2 = sns.barplot(y = hulu_mixed_countries_tvshows['Mixed Country'][:10], x = hulu_mixed_countries_tvshows['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_mco_ax3 = sns.barplot(y = prime_video_mixed_countries_tvshows['Mixed Country'][:10], x = prime_video_mixed_countries_tvshows['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_mco_ax4 = sns.barplot(y = disney_mixed_countries_tvshows['Mixed Country'][:10], x = disney_mixed_countries_tvshows['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
n_mco_ax1.title.set_text(labels[0])
h_mco_ax2.title.set_text(labels[1])
p_mco_ax3.title.set_text(labels[2])
d_mco_ax4.title.set_text(labels[3])
plt.show()
fig = go.Figure(go.Funnel(y = mixed_countries_data_tvshows['Mixed Country'][:10], x = mixed_countries_data_tvshows['TV Shows Count'][:10]))
fig.show()